{
    "nbformat": 4,
    "nbformat_minor": 2,
    "metadata": {
        "kernelspec": {
            "display_name": "Python 3",
            "language": "python",
            "name": "python3"
        },
        "language_info": {
            "codemirror_mode": {
                "version": 3,
                "name": "ipython"
            },
            "mimetype": "text/x-python",
            "file_extension": ".py",
            "version": "3.6.4",
            "pygments_lexer": "ipython3",
            "nbconvert_exporter": "python",
            "name": "python"
        }
    },
    "cells": [
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "# [HW4] Problem: Overparameterized Linear Regression\n"
            ]
        },
        {
            "execution_count": null,
            "cell_type": "code",
            "metadata": {},
            "source": [
                "import numpy as np\n",
                "import matplotlib.pyplot as plt\n",
                "%matplotlib inline\n",
                "from ipywidgets import interactive\n",
                "import ipywidgets as widgets\n",
                "from ipywidgets import fixed\n",
                "\n",
                "# Import various helpful functions\n",
                "from helpers import *\n"
            ],
            "outputs": []
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "## Part (g): Solve the weighted LS problem\n",
                "\n",
                "**Solve the weighted least-squares regression to obtain the final coefficients $\\hat{\\boldsymbol{\\alpha}}$** using the sklearn `LinearRegression` model. Remember that we first find the coefficients for _weighted_ features $\\hat{\\boldsymbol{\\beta}}$, then use the weights to find the final coefficients.\n"
            ]
        },
        {
            "execution_count": null,
            "cell_type": "code",
            "metadata": {},
            "source": [
                "from sklearn.linear_model import LinearRegression\n",
                "def solve_ls(phi, y, weights=None):\n",
                "    d = phi.shape[1]\n",
                "    if weights is None:\n",
                "        weights  = np.ones(d)\n",
                "    phi_weighted = weights*phi\n",
                "    LR = LinearRegression(fit_intercept=False, normalize=False)\n",
                "    # TODO: Train the linear regressor to obtain the weighted coefficients beta, then\n",
                "    #       use the weights to obtain the final coefficients alpha\n",
                "    ### start g1 ###\n",
                "\n",
                "    ### end g1 ###\n",
                "    loss = np.mean((y - phi @ alpha.T)**2)\n",
                "    return alpha, loss\n"
            ],
            "outputs": []
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "Next, we have implemented for you a solve function that calculates the regression coefficients for the cases in parts (a)-(c). Take a look at the coefficients learned in each case, and notice how the noise energy is aliased across $d > n$ features.\n"
            ]
        },
        {
            "execution_count": null,
            "cell_type": "code",
            "metadata": {},
            "source": [
                "#Set randomness\n",
                "seed = 127\n",
                "np.random.seed(seed)\n",
                "\n",
                "s = 25\n",
                "n = 128\n",
                "d = 257\n",
                "num_training_noise_seeds = 10\n",
                "phi_type = 'fourier'\n",
                "x_type = 'grid'\n",
                "f_type = 'cos2'\n",
                "awgn_std = 2e-1\n",
                "n_test = 10000\n",
                "noise_seed_idx = 5\n",
                "gamma = 0.9\n",
                "\n",
                "\n",
                "def solve(s, n, d, num_training_noise_seeds,\n",
                "          phi_type, x_type, f_type,\n",
                "          awgn_std, n_test, noise_seed_idx,\n",
                "          gamma,\n",
                "          plot_all=True):\n",
                "    # TODO: print SU, CNs, CNe values based on params\n",
                "    assert(d >= n)\n",
                "    assert( d >= s)\n",
                "    x_train = generate_x(x_type=x_type, n=n)\n",
                "    phi_train = featurize(x_train, d, phi_type)\n",
                "    y_train = generate_y(x=x_train, f_type=f_type)\n",
                "\n",
                "    x_test= generate_x(x_type = 'uniform_random', n=n_test)\n",
                "    y_test = generate_y(x=x_test, f_type=f_type)\n",
                "    phi_test = featurize(x_test, d, phi_type)\n",
                "\n",
                "    weights = get_bilevel_weights(s,gamma,d)\n",
                "    plot_weights(weights, gamma, s)\n",
                "\n",
                "    # Expected prediction error\n",
                "    lambd = s * (1 - gamma) / (n * gamma)\n",
                "    SU = 1. / (1 + lambd)\n",
                "    CNs_sqr = (n / d) * (lambd ** 2 / (1 + lambd) ** 2)\n",
                "    CNe_sqr = awgn_std ** 2 * ((s / n) * ((1 + n * lambd ** 2 / d) / (1 + lambd) ** 2) + (n - s) / d)\n",
                "    print(\"(1-SU)^2: {:.4f}, CNs^2: {:.4f}, CNe^2: {:.4f}\".format(\n",
                "            (1-SU)**2, CNs_sqr, CNe_sqr))\n",
                "    print(\"lambda: \"+str(round(lambd,4)))\n",
                "\n",
                "    # Generate noise\n",
                "    noise = np.random.normal(0, awgn_std, size = [y_train.shape[0], num_training_noise_seeds])\n",
                "    y_train_noisy = y_train[:,None] + noise\n",
                "\n",
                "    # Solve the noiseless case\n",
                "    coeffs_noiseless, loss_noiseless  = solve_ls(phi_train, y_train, weights)\n",
                "    true_coeffs = np.zeros_like(coeffs_noiseless)\n",
                "    true_coeffs[:n] = solve_ls(phi_train[:,:n], y_train)[0]\n",
                "    y_test_pred_noiseless = phi_test @ coeffs_noiseless\n",
                "    if plot_all:\n",
                "        plt.figure(figsize=[16, 9])\n",
                "        plt.subplot(2,3,1)\n",
                "        plot_prediction(x_train, y_train, x_test, y_test, y_test_pred_noiseless, show=not plot_all)\n",
                "        plt.subplot(2,3,4)\n",
                "        plot_coeffs(coeffs_noiseless, true_coeffs, title = 'Signal only', show=not plot_all)\n",
                "    pred_error_noiseless = np.mean((y_test_pred_noiseless - y_test)**2)\n",
                "    print(\"Noiseless Training Loss: \", loss_noiseless)\n",
                "    print(\"Noiseless Prediction Error: \", round(pred_error_noiseless,3))\n",
                "\n",
                "    # Solve the pure noise case\n",
                "    coeffs_noise, loss_noise  = solve_ls(phi_train, noise, weights)\n",
                "    coeffs_noise = coeffs_noise.T\n",
                "    true_coeffs_noise = np.zeros(d)\n",
                "    y_test_pred_noise = phi_test @ coeffs_noise\n",
                "    if plot_all:\n",
                "        plt.subplot(2,3,2)\n",
                "        plot_prediction(x_train, noise[:,noise_seed_idx], x_test, np.zeros_like(x_test), y_test_pred_noise[:,noise_seed_idx], show=not plot_all)\n",
                "        plt.subplot(2,3,5)\n",
                "        plot_coeffs(coeffs_noise[:,noise_seed_idx], true_coeffs_noise, title = 'Noise only', show=not plot_all)\n",
                "    pred_error_noise = np.mean((y_test_pred_noise)**2)\n",
                "    print(\"Pure Noise Training Loss: \", loss_noise)\n",
                "    print(\"Pure Noise Prediction Error: \", round(pred_error_noise,3))\n",
                "\n",
                "    # Solve the signal + noise case\n",
                "    # Here we take advantage of the linearity of the interpolator to avoid solving\n",
                "    # another LS problem.\n",
                "#     coeffs, loss  = solve_ls(phi_train, y_train_noisy, weights)\n",
                "#     coeffs = coeffs.T\n",
                "    coeffs = coeffs_noise + coeffs_noiseless[:,None]\n",
                "    loss =  np.mean((phi_train @ coeffs - y_train_noisy)**2)\n",
                "    y_test_pred = phi_test @ coeffs\n",
                "\n",
                "    if plot_all:\n",
                "        plt.subplot(2,3,3)\n",
                "    plot_prediction(x_train, y_train_noisy[:,noise_seed_idx], x_test, y_test, y_test_pred[:,noise_seed_idx], show=not plot_all)\n",
                "\n",
                "    if plot_all:\n",
                "        plt.subplot(2,3,6)\n",
                "    plot_coeffs(coeffs[:,noise_seed_idx], true_coeffs, title = 'Signal + Noise', show=not plot_all)\n",
                "    pred_error = np.mean((y_test_pred - y_test[:,None])**2)\n",
                "    print(\"Final Training Loss: \", loss)\n",
                "    print(\"Final Prediction Error: \", round(pred_error,3))\n",
                "\n",
                "\n",
                "solve(s, n, d,num_training_noise_seeds, phi_type,x_type,f_type,awgn_std,n_test,noise_seed_idx,gamma)\n"
            ],
            "outputs": []
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "Play around with the interactive plot below. **Comment on the interactions between $\\gamma$ and the prediction error, $s$ and the prediction error, $d$ and the prediction error, and the behavior as $\\sigma_{awgn}$ increases.**\n"
            ]
        },
        {
            "execution_count": null,
            "cell_type": "code",
            "metadata": {},
            "source": [
                "interactive_plot = interactive(solve,\n",
                "                               s=generate_s_widget(50),\n",
                "                               n=fixed(200),\n",
                "                               d=generate_d_widget(),\n",
                "                               num_training_noise_seeds=fixed(num_training_noise_seeds),\n",
                "                               phi_type=fixed(phi_type), x_type=fixed(x_type), f_type=fixed(f_type),\n",
                "                               awgn_std=generate_awgn_std_widget(),\n",
                "                               n_test=fixed(n_test),\n",
                "                               noise_seed_idx=fixed(noise_seed_idx),\n",
                "                               gamma=generate_gamma_widget(),\n",
                "                               plot_all=fixed(True))\n",
                "interactive_plot\n"
            ],
            "outputs": []
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "_Your comments here..._\n"
            ]
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "Finally, use the function `vary_everything_together` to set a constant ratio for $s$, $d$, and $n$, then explore the behavior as $n$ varies. **Comment on the prediction error as $n$ varies for several combinations of $p$, $q$, and $r$.** You may want to restrict yourself to $n \\leq 250$ and $q\\leq 1.5$, as solving the regression problem becomes slow.\n"
            ]
        },
        {
            "execution_count": null,
            "cell_type": "code",
            "metadata": {},
            "source": [
                "def vary_everything_together(n, p, q, r):\n",
                "    \"\"\"\n",
                "    s = n^p\n",
                "    d = n^q (q >= 1)\n",
                "    gamma = n^(-r) (0 <= r <= q-p to favor first s features)\n",
                "    \"\"\"\n",
                "    s = int(n**p)\n",
                "    d = int(n**q)\n",
                "    if d%2 == 0:\n",
                "        d += 1\n",
                "    gamma = n**(-r)\n",
                "    if gamma < s/d:\n",
                "        print(\"WARNING: No longer favoring first s features\")\n",
                "    return s,d,gamma\n",
                "\n",
                "# TODO: Vary n, for various combinations of p, q, and r\n",
                "n = 250\n",
                "p = 0.8\n",
                "q = 1.5\n",
                "r = 0.05\n",
                "\n",
                "s, d, gamma = vary_everything_together(n, p, q, r)\n",
                "print(\"s: %d, d: %d, gamma: %f\" % (s, d, gamma))\n",
                "print(\"s/n: %f, n/d: %f, lambda: %f\" % (s/n, n/d, s * (1 - gamma) / (n * gamma)))\n",
                "solve(s, n, d, num_training_noise_seeds,\n",
                "      phi_type, x_type, f_type,\n",
                "      awgn_std, n_test, noise_seed_idx, gamma)\n"
            ],
            "outputs": []
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "_Your comments here..._\n"
            ]
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "## Part (i)\n",
                "### Connection to ridge regression\n",
                "\n",
                "The $\\lambda$ parameter induced by the extra features can be used (with appropriate scaling) in ridge regression with the low frequency features. Below you can see that the survival of the true coefficient and the shape of the learned functions are almost the same. **Explore how s and $\\gamma$ affect $\\lambda$ and the correspondence between the two versions of regression.**\n"
            ]
        },
        {
            "execution_count": null,
            "cell_type": "code",
            "metadata": {},
            "source": [
                "#Set randomness\n",
                "seed = 127\n",
                "np.random.seed(seed)\n",
                "\n",
                "# s = 25\n",
                "# n = 128\n",
                "# d = 257\n",
                "num_training_noise_seeds = 10\n",
                "phi_type = 'fourier'\n",
                "x_type = 'grid'\n",
                "f_type = 'cos2'\n",
                "awgn_std = 2e-1\n",
                "n_test = 10000\n",
                "noise_seed_idx = 5\n",
                "gamma = 0.9\n",
                "\n",
                "from helpers import solve2\n"
            ],
            "outputs": []
        },
        {
            "execution_count": null,
            "cell_type": "code",
            "metadata": {},
            "source": [
                "interactive_plot = interactive(solve2,\n",
                "                               s=generate_s_widget(50),\n",
                "                               n=fixed(100),\n",
                "                               d=fixed(1001),\n",
                "                               num_training_noise_seeds=fixed(num_training_noise_seeds),\n",
                "                               phi_type=fixed(phi_type), x_type=fixed(x_type), f_type=fixed(f_type),\n",
                "                               awgn_std=fixed(0),\n",
                "                               n_test=fixed(n_test),\n",
                "                               noise_seed_idx=fixed(noise_seed_idx),\n",
                "                               gamma=generate_gamma_widget(0.5),\n",
                "                               plot_all=fixed(True))\n",
                "interactive_plot\n"
            ],
            "outputs": []
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "### Impulse response of training point\n",
                "\n",
                "The plots below visualize the impulse response of a single training point, $\\mathbf{y} = [0,\\ldots,1,0,\\ldots,0]$. **Explore how the shape of the function using the first $s$ features changes with $s$ and how it impacts the learned function. Explore how the full impulse reponse changes shape with $d$ and $\\gamma$ and how that impacts the learned function.**\n"
            ]
        },
        {
            "execution_count": null,
            "cell_type": "code",
            "metadata": {},
            "source": [
                "#Set randomness\n",
                "seed = 127\n",
                "np.random.seed(seed)\n",
                "\n",
                "s = 12\n",
                "n = 32\n",
                "d = 1025\n",
                "num_training_noise_seeds = 10\n",
                "phi_type = 'fourier'\n",
                "x_type = 'grid'\n",
                "f_type = 'cos2'\n",
                "awgn_std = 2e-1\n",
                "n_test = 10000\n",
                "noise_seed_idx = 5\n",
                "gamma = 0.9\n",
                "\n",
                "from helpers import solve3\n",
                "# solve3(s, n, d, num_training_noise_seeds,\n",
                "#       phi_type, x_type, f_type,\n",
                "#       awgn_std, n_test, noise_seed_idx, gamma)\n"
            ],
            "outputs": []
        },
        {
            "execution_count": null,
            "cell_type": "code",
            "metadata": {},
            "source": [
                "interactive_plot = interactive(solve3,\n",
                "                               s=generate_s_widget(11, max_s=n),\n",
                "                               n=fixed(32),\n",
                "                               d=generate_d_widget(),\n",
                "                               num_training_noise_seeds=fixed(num_training_noise_seeds),\n",
                "                               phi_type=fixed(phi_type), x_type=fixed(x_type), f_type=fixed(f_type),\n",
                "                               awgn_std=fixed(0),\n",
                "                               n_test=fixed(n_test),\n",
                "                               noise_seed_idx=fixed(noise_seed_idx),\n",
                "                               gamma=generate_gamma_widget(0.9, gamma_min=s/d),\n",
                "                               plot_all=fixed(True))\n",
                "interactive_plot\n"
            ],
            "outputs": []
        },
        {
            "execution_count": null,
            "cell_type": "code",
            "metadata": {},
            "source": [],
            "outputs": []
        }
    ]
}